# Replication code for Taylor C. Boas and Amy Erica Smith, “Looks Like Me, Thinks Like Me: Descriptive Representation and Opinion Congruence in Brazil.” Latin American Research Review 54, 2 (2019).

# Analysis conducted in R 3.6.0 on MacOS 10.13.6

# NOTE: This file replicates Appendix Table 9 and Appendix Figure 6. We recommend running R replication files in the following order; please see readme.txt for details.
# 	1_merge_lapop.R
# 	2_merge_latinobarometro.R
# 	3_recode_reshape.R
# 	4_difference_in_distributions.R
# 	5_regressions.R
# 	6_civil_society_meeting.R
# 	7_mass_descriptives.R
# 	8_elite_descriptives.R
# 	9_mean_differences.R
#	10_difference_in_distributions_ks.R
#	11_elite_sample_simulation.R
#	12_converts_vs_lifelong.R

# Set working directory as appropriate
# setwd('~/Dropbox/brazil_leg_surveys/replication/')

# Clean desktop and load packages. Please make sure all necessary packages are installed.

rm(list=ls(all=T))

library(Hmisc)

load('mass.data.RData')
load('elite.data.RData')

# Functions for calculating differences in CDFs

wtd.ecdf<-function(x,weights=NULL) function(y) {
	cdf<-wtd.Ecdf(x,weights=weights,normwt=T,type='i/n')
	cdf<-data.frame(x=cdf$x,Fx=cdf$ecdf)
	cdf$x[1]<- -Inf
	sapply(y,function(z) cdf$Fx[max(which(round(cdf$x,5) <= round(z,5)))]) # Dealing with differences in precision that were causing equal numbers to not be recognized as such
}

ecdf.diff<-function(x,y){
	vals<-sort(unique(c(x,y)))
	Fy<-ecdf(y)(vals)
	Fx<-ecdf(x)(vals)
	area<-0
	for(i in 1:(length(vals)-1)){
		area<-area+abs(diff(vals[i:(i+1)])*(Fx[i]-Fy[i]))
	}
	area
}

wtd.ecdf.diff<-function(x,y,weights.x,weights.y){
	vals<-sort(unique(c(x,y)))
	Fy<-wtd.ecdf(y,weights=weights.y)(vals)
	Fx<-wtd.ecdf(x,weights=weights.x)(vals)
	area<-0
	for(i in 1:(length(vals)-1)){
		area<-area+abs(diff(vals[i:(i+1)])*(Fx[i]-Fy[i]))
	}
	area
}

elite.data$elite_pt<-elite.data$elite_partythen=='PT'
elite.data$elite_psdb<-elite.data$elite_partythen=='PSDB'
elite.data$elite_pmdb<-elite.data$elite_partythen=='PMDB'
elite.data$elite_mg<-elite.data$elite_uf=='MG'
elite.data$elite_sp<-elite.data$elite_uf=='SP'

mass.data$mass_pt<-mass.data$mass_party=='PT'
mass.data$mass_psdb<-mass.data$mass_party=='PSDB'
mass.data$mass_pmdb<-mass.data$mass_party=='PMDB'
mass.data$mass_mg<-mass.data$mass_uf=='MG'
mass.data$mass_sp<-mass.data$mass_uf=='SP'

# For getting valid full sample for that issue
elite.data$one<-1
mass.data$one<-1

# =========================================================
# Appendix Table 9: Valid N (Unweighted) by Issue and Group 
# =========================================================

elite.n<-matrix(c(
apply(elite.data[!is.na(elite.data$elite_econ),c('one','elite_evang','elite_female','elite_black','elite_loweduc','elite_pt','elite_psdb','elite_pmdb','elite_mg','elite_sp')],2,sum,na.rm=T),
apply(elite.data[!is.na(elite.data$elite_ideol),c('one','elite_evang','elite_female','elite_black','elite_loweduc','elite_pt','elite_psdb','elite_pmdb','elite_mg','elite_sp')],2,sum,na.rm=T),
apply(elite.data[!is.na(elite.data$elite_affirm),c('one','elite_evang','elite_female','elite_black','elite_loweduc','elite_pt','elite_psdb','elite_pmdb','elite_mg','elite_sp')],2,sum,na.rm=T),
apply(elite.data[!is.na(elite.data$elite_classaffirm),c('one','elite_evang','elite_female','elite_black','elite_loweduc','elite_pt','elite_psdb','elite_pmdb','elite_mg','elite_sp')],2,sum,na.rm=T),
apply(elite.data[!is.na(elite.data$elite_abortion),c('one','elite_evang','elite_female','elite_black','elite_loweduc','elite_pt','elite_psdb','elite_pmdb','elite_mg','elite_sp')],2,sum,na.rm=T),
apply(elite.data[!is.na(elite.data$elite_gaymarriage),c('one','elite_evang','elite_female','elite_black','elite_loweduc','elite_pt','elite_psdb','elite_pmdb','elite_mg','elite_sp')],2,sum,na.rm=T),
apply(elite.data[!is.na(elite.data$elite_environment),c('one','elite_evang','elite_female','elite_black','elite_loweduc','elite_pt','elite_psdb','elite_pmdb','elite_mg','elite_sp')],2,sum,na.rm=T)),ncol=7,nrow=10,byrow=F)

mass.n<-matrix(c(
apply(mass.data[!is.na(mass.data$mass_econ),c('one','mass_evang','mass_female','mass_black','mass_loweduc','mass_pt','mass_psdb','mass_pmdb','mass_mg','mass_sp')],2,sum,na.rm=T),
apply(mass.data[!is.na(mass.data$mass_ideol),c('one','mass_evang','mass_female','mass_black','mass_loweduc','mass_pt','mass_psdb','mass_pmdb','mass_mg','mass_sp')],2,sum,na.rm=T),
apply(mass.data[!is.na(mass.data$mass_affirm),c('one','mass_evang','mass_female','mass_black','mass_loweduc','mass_pt','mass_psdb','mass_pmdb','mass_mg','mass_sp')],2,sum,na.rm=T),
apply(mass.data[!is.na(mass.data$mass_classaffirm),c('one','mass_evang','mass_female','mass_black','mass_loweduc','mass_pt','mass_psdb','mass_pmdb','mass_mg','mass_sp')],2,sum,na.rm=T),
apply(mass.data[!is.na(mass.data$mass_abortion),c('one','mass_evang','mass_female','mass_black','mass_loweduc','mass_pt','mass_psdb','mass_pmdb','mass_mg','mass_sp')],2,sum,na.rm=T),
apply(mass.data[!is.na(mass.data$mass_gaymarriage),c('one','mass_evang','mass_female','mass_black','mass_loweduc','mass_pt','mass_psdb','mass_pmdb','mass_mg','mass_sp')],2,sum,na.rm=T),
apply(mass.data[!is.na(mass.data$mass_environment),c('one','mass_evang','mass_female','mass_black','mass_loweduc','mass_pt','mass_psdb','mass_pmdb','mass_mg','mass_sp')],2,sum,na.rm=T)),ncol=7,nrow=10,byrow=F)

years<-c("'02--'13","'02--'13","'10--'13","'10--'13","'10--'13","'10--'13","'11--'13")

n.table<-rbind(years,elite.n, apply(mass.n,2,prettyNum,big.mark=','))
colnames(n.table)<-c('Economic\nRegime','Ideology','Race\nQuotas','Class\nQuotas','Abortion','Gay\nMarriage','Environ-\nment')
rownames(n.table)<-c('Years',rep(c('All','Evangelical','Female','Afro-Brazilian','No College','Party: PT','Party: PSDB','Party: PMDB','State: MG','State: SP'),2))
n.table[n.table==0]<-NA

n.table.latex<-latex(n.table, file='n_table.tex', collabel.just=rep('c',ncol(n.table)),col.just=rep('r',ncol(n.table)), rowlabel = '', caption = 'Valid N (Unweighted) by Issue and Group', rgroup = c('','Elites','Masses'), n.rgroup = c(1,10,10), booktabs = F, ctable = T, where = "htp",na.blank=T, extracolsize='normalsize')

# =======================================================================
# Appendix Figure 6: Elite Sample Size and Congruence: Simulation Results 
# =======================================================================

# NOTE: Appendix Figure 6 was produced using this code, without setting a seed. Hence, results will differ slightly each time it is run.

# Affirmative action--race

affirm.sample.30<-matrix(sample(elite.data$elite_affirm[!is.na(elite.data$elite_affirm)],30*1000,replace=T),nrow=30)
affirm.sample.20<-matrix(sample(elite.data$elite_affirm[!is.na(elite.data$elite_affirm)],20*1000,replace=T),nrow=20)
affirm.sample.10<-matrix(sample(elite.data$elite_affirm[!is.na(elite.data$elite_affirm)],10*1000,replace=T),nrow=10)
diff.affirm.sample.30<-apply(affirm.sample.30, 2, function(x) wtd.ecdf.diff(x,mass.data$mass_affirm,NULL,mass.data$mass_pweight))
diff.affirm.sample.20<-apply(affirm.sample.20, 2, function(x) wtd.ecdf.diff(x,mass.data$mass_affirm,NULL,mass.data$mass_pweight))
diff.affirm.sample.10<-apply(affirm.sample.10, 2, function(x) wtd.ecdf.diff(x,mass.data$mass_affirm,NULL,mass.data$mass_pweight))
diff.affirm.all<-wtd.ecdf.diff(elite.data$elite_affirm,mass.data$mass_affirm, NULL,mass.data$mass_pweight)

# Affirmative action--class

classaffirm.sample.30<-matrix(sample(elite.data$elite_classaffirm[!is.na(elite.data$elite_classaffirm)],30*1000,replace=T),nrow=30)
classaffirm.sample.20<-matrix(sample(elite.data$elite_classaffirm[!is.na(elite.data$elite_classaffirm)],20*1000,replace=T),nrow=20)
classaffirm.sample.10<-matrix(sample(elite.data$elite_classaffirm[!is.na(elite.data$elite_classaffirm)],10*1000,replace=T),nrow=10)
diff.classaffirm.sample.30<-apply(classaffirm.sample.30, 2, function(x) wtd.ecdf.diff(x,mass.data$mass_classaffirm,NULL,mass.data$mass_pweight))
diff.classaffirm.sample.20<-apply(classaffirm.sample.20, 2, function(x) wtd.ecdf.diff(x,mass.data$mass_classaffirm,NULL,mass.data$mass_pweight))
diff.classaffirm.sample.10<-apply(classaffirm.sample.10, 2, function(x) wtd.ecdf.diff(x,mass.data$mass_classaffirm,NULL,mass.data$mass_pweight))
diff.classaffirm.all<-wtd.ecdf.diff(elite.data$elite_classaffirm,mass.data$mass_classaffirm, NULL,mass.data$mass_pweight)

# Abortion

abortion.sample.30<-matrix(sample(elite.data$elite_abortion[!is.na(elite.data$elite_abortion)],30*1000,replace=T),nrow=30)
abortion.sample.20<-matrix(sample(elite.data$elite_abortion[!is.na(elite.data$elite_abortion)],20*1000,replace=T),nrow=20)
abortion.sample.10<-matrix(sample(elite.data$elite_abortion[!is.na(elite.data$elite_abortion)],10*1000,replace=T),nrow=10)
diff.abortion.sample.30<-apply(abortion.sample.30, 2, function(x) wtd.ecdf.diff(x,mass.data$mass_abortion,NULL,mass.data$mass_pweight))
diff.abortion.sample.20<-apply(abortion.sample.20, 2, function(x) wtd.ecdf.diff(x,mass.data$mass_abortion,NULL,mass.data$mass_pweight))
diff.abortion.sample.10<-apply(abortion.sample.10, 2, function(x) wtd.ecdf.diff(x,mass.data$mass_abortion,NULL,mass.data$mass_pweight))
diff.abortion.all<-wtd.ecdf.diff(elite.data$elite_abortion,mass.data$mass_abortion, NULL,mass.data$mass_pweight)

# Gay marriage

gaymarriage.sample.30<-matrix(sample(elite.data$elite_gaymarriage[!is.na(elite.data$elite_gaymarriage)],30*1000,replace=T),nrow=30)
gaymarriage.sample.20<-matrix(sample(elite.data$elite_gaymarriage[!is.na(elite.data$elite_gaymarriage)],20*1000,replace=T),nrow=20)
gaymarriage.sample.10<-matrix(sample(elite.data$elite_gaymarriage[!is.na(elite.data$elite_gaymarriage)],10*1000,replace=T),nrow=10)
diff.gaymarriage.sample.30<-apply(gaymarriage.sample.30, 2, function(x) wtd.ecdf.diff(x,mass.data$mass_gaymarriage,NULL,mass.data$mass_pweight))
diff.gaymarriage.sample.20<-apply(gaymarriage.sample.20, 2, function(x) wtd.ecdf.diff(x,mass.data$mass_gaymarriage,NULL,mass.data$mass_pweight))
diff.gaymarriage.sample.10<-apply(gaymarriage.sample.10, 2, function(x) wtd.ecdf.diff(x,mass.data$mass_gaymarriage,NULL,mass.data$mass_pweight))
diff.gaymarriage.all<-wtd.ecdf.diff(elite.data$elite_gaymarriage,mass.data$mass_gaymarriage, NULL,mass.data$mass_pweight)

# Environment

environment.sample.30<-matrix(sample(elite.data$elite_environment[!is.na(elite.data$elite_environment)],30*1000,replace=T),nrow=30)
environment.sample.20<-matrix(sample(elite.data$elite_environment[!is.na(elite.data$elite_environment)],20*1000,replace=T),nrow=20)
environment.sample.10<-matrix(sample(elite.data$elite_environment[!is.na(elite.data$elite_environment)],10*1000,replace=T),nrow=10)
diff.environment.sample.30<-apply(environment.sample.30, 2, function(x) ecdf.diff(x,mass.data$mass_environment))
diff.environment.sample.20<-apply(environment.sample.20, 2, function(x) ecdf.diff(x,mass.data$mass_environment))
diff.environment.sample.10<-apply(environment.sample.10, 2, function(x) ecdf.diff(x,mass.data$mass_environment))
diff.environment.all<-ecdf.diff(elite.data$elite_environment,mass.data$mass_environment)

# Economic regime--drawing sample equally from each BLS wave to obviate elite weights

mass.weight.econ.all<-data.frame((1/3)/prop.table(table(mass.data$mass_blsyear[!is.na(mass.data$mass_econ)])))
mass.data$weight.econ.all<-ifelse(is.na(mass.data$mass_econ),NA,mass.weight.econ.all[match(mass.data$mass_blsyear, mass.weight.econ.all[,1]),2])
elite.weight.econ.all<-data.frame((1/3)/prop.table(table(elite.data$elite_year[!is.na(elite.data$elite_econ)])))
elite.data$weight.econ.all<-ifelse(is.na(elite.data$elite_econ),NA,elite.weight.econ.all[match(elite.data$elite_year, elite.weight.econ.all[,1]),2])

econ.sample.21<-rbind(matrix(sample(elite.data$elite_econ[!is.na(elite.data$elite_econ)&elite.data$elite_year==2005],7*1000,replace=T),nrow=7),matrix(sample(elite.data$elite_econ[!is.na(elite.data$elite_econ)&elite.data$elite_year==2009],7*1000,replace=T),nrow=7),matrix(sample(elite.data$elite_econ[!is.na(elite.data$elite_econ)&elite.data$elite_year==2013],7*1000,replace=T),nrow=7))
econ.sample.42<-rbind(matrix(sample(elite.data$elite_econ[!is.na(elite.data$elite_econ)&elite.data$elite_year==2005],14*1000,replace=T),nrow=14),matrix(sample(elite.data$elite_econ[!is.na(elite.data$elite_econ)&elite.data$elite_year==2009],14*1000,replace=T),nrow=14),matrix(sample(elite.data$elite_econ[!is.na(elite.data$elite_econ)&elite.data$elite_year==2013],14*1000,replace=T),nrow=14))
econ.sample.63<-rbind(matrix(sample(elite.data$elite_econ[!is.na(elite.data$elite_econ)&elite.data$elite_year==2005],21*1000,replace=T),nrow=21),matrix(sample(elite.data$elite_econ[!is.na(elite.data$elite_econ)&elite.data$elite_year==2009],21*1000,replace=T),nrow=21),matrix(sample(elite.data$elite_econ[!is.na(elite.data$elite_econ)&elite.data$elite_year==2013],21*1000,replace=T),nrow=21))

diff.econ.sample.21<-apply(econ.sample.21, 2, function(x) wtd.ecdf.diff(x,mass.data$mass_econ,NULL, mass.data$mass_pweight*mass.data$weight.econ.all))
diff.econ.sample.42<-apply(econ.sample.42, 2, function(x) wtd.ecdf.diff(x,mass.data$mass_econ,NULL, mass.data$mass_pweight*mass.data$weight.econ.all))
diff.econ.sample.63<-apply(econ.sample.63, 2, function(x) wtd.ecdf.diff(x,mass.data$mass_econ,NULL, mass.data$mass_pweight*mass.data$weight.econ.all))
diff.econ.all<-wtd.ecdf.diff(elite.data$elite_econ,mass.data$mass_econ,elite.data$weight.econ.all,mass.data$mass_pweight*mass.data$weight.econ.all)

# Ideology--drawing sample equally from each BLS wave to obviate elite weights

mass.weight.ideol.all<-data.frame((1/3)/prop.table(table(mass.data$mass_blsyear[!is.na(mass.data$mass_ideol)])))
mass.data$weight.ideol.all<-ifelse(is.na(mass.data$mass_ideol),NA,mass.weight.ideol.all[match(mass.data$mass_blsyear, mass.weight.ideol.all[,1]),2])
elite.weight.ideol.all<-data.frame((1/3)/prop.table(table(elite.data$elite_year[!is.na(elite.data$elite_ideol)])))
elite.data$weight.ideol.all<-ifelse(is.na(elite.data$elite_ideol),NA,elite.weight.ideol.all[match(elite.data$elite_year, elite.weight.ideol.all[,1]),2])

ideol.sample.21<-rbind(matrix(sample(elite.data$elite_ideol[!is.na(elite.data$elite_ideol)&elite.data$elite_year==2005],7*1000,replace=T),nrow=7),matrix(sample(elite.data$elite_ideol[!is.na(elite.data$elite_ideol)&elite.data$elite_year==2009],7*1000,replace=T),nrow=7),matrix(sample(elite.data$elite_ideol[!is.na(elite.data$elite_ideol)&elite.data$elite_year==2013],7*1000,replace=T),nrow=7))
ideol.sample.42<-rbind(matrix(sample(elite.data$elite_ideol[!is.na(elite.data$elite_ideol)&elite.data$elite_year==2005],14*1000,replace=T),nrow=14),matrix(sample(elite.data$elite_ideol[!is.na(elite.data$elite_ideol)&elite.data$elite_year==2009],14*1000,replace=T),nrow=14),matrix(sample(elite.data$elite_ideol[!is.na(elite.data$elite_ideol)&elite.data$elite_year==2013],14*1000,replace=T),nrow=14))
ideol.sample.63<-rbind(matrix(sample(elite.data$elite_ideol[!is.na(elite.data$elite_ideol)&elite.data$elite_year==2005],21*1000,replace=T),nrow=21),matrix(sample(elite.data$elite_ideol[!is.na(elite.data$elite_ideol)&elite.data$elite_year==2009],21*1000,replace=T),nrow=21),matrix(sample(elite.data$elite_ideol[!is.na(elite.data$elite_ideol)&elite.data$elite_year==2013],21*1000,replace=T),nrow=21))

diff.ideol.sample.21<-apply(ideol.sample.21, 2, function(x) wtd.ecdf.diff(x,mass.data$mass_ideol,NULL, mass.data$mass_pweight*mass.data$weight.ideol.all))
diff.ideol.sample.42<-apply(ideol.sample.42, 2, function(x) wtd.ecdf.diff(x,mass.data$mass_ideol,NULL, mass.data$mass_pweight*mass.data$weight.ideol.all))
diff.ideol.sample.63<-apply(ideol.sample.63, 2, function(x) wtd.ecdf.diff(x,mass.data$mass_ideol,NULL, mass.data$mass_pweight*mass.data$weight.ideol.all))
diff.ideol.all<-wtd.ecdf.diff(elite.data$elite_ideol,mass.data$mass_ideol,elite.data$weight.ideol.all,mass.data$mass_pweight*mass.data$weight.ideol.all)

# Summary figure

ecdf.table<-matrix(c(
	diff.econ.all, mean(diff.econ.sample.63), mean(diff.econ.sample.42), mean(diff.econ.sample.21),
	diff.ideol.all, mean(diff.ideol.sample.63), mean(diff.ideol.sample.42), mean(diff.ideol.sample.21),
	diff.affirm.all, mean(diff.affirm.sample.30), mean(diff.affirm.sample.20), mean(diff.affirm.sample.10),
	diff.classaffirm.all, mean(diff.classaffirm.sample.30), mean(diff.classaffirm.sample.20), mean(diff.classaffirm.sample.10),
	diff.abortion.all, mean(diff.abortion.sample.30), mean(diff.abortion.sample.20), mean(diff.abortion.sample.10),
	diff.gaymarriage.all, mean(diff.gaymarriage.sample.30), mean(diff.gaymarriage.sample.20), mean(diff.gaymarriage.sample.10),
	diff.environment.all, mean(diff.environment.sample.30), mean(diff.environment.sample.20), mean(diff.environment.sample.10)),ncol=4,byrow=T)

rownames(ecdf.table)<-c('Economic\nRegime\n(2002-13)','Ideology\n(2002-13)','Race\nQuotas\n(2010-13)','Class\nQuotas\n(2010-13)','Abortion\n(2010-13)','Gay\nMarriage\n(2010-13)','Environment\n(2011-13)')

n<-cbind(apply(elite.data[,c('elite_econ','elite_ideol','elite_affirm','elite_classaffirm','elite_abortion','elite_gaymarriage','elite_environment')],2,function(x) sum(!is.na(x))),c(63,63,rep(30,5)), c(42,42,rep(20,5)), c(21,21,rep(10,5)))

y<-matrix(7:1*2,ncol=4,nrow=7) + matrix(seq(-.75,.75,.5)*.75,ncol=4,nrow=7,byrow=T)

pdf(file='wtd_ecdf_diff_points_plot_sample.pdf',height=10,width=7.5)
par(mar=c(5,6,2,2)+.1)
plot(NULL,xlab='Distance Between Mass and Elite Respondents',yaxt='n',ylab='',xlim=c(0,.4),ylim=range(y))
text(ecdf.table, y, n)
abline(h=(9:2)*2-1,lty=3,lwd=1)
Axis(side=2,at=(7:1)*2,labels=rownames(ecdf.table),las=2,hadj=0,tick=F,line=4)
dev.off()
